knitr::opts_chunk$set(echo = TRUE,
message = params$debug,
warning = params$debug,
cache = FALSE,
error = params$error,
results = if(isTRUE(params$interact)) 'asis' else 'markup')
### Create Output Directory ###
# if save_output = TRUE
if(params$save_output){
outfile_dir <- file.path(params$output_dir,"EpiCompare_file")
if(!dir.exists(outfile_dir)){
dir.create(outfile_dir, showWarnings = FALSE, recursive = TRUE)
}
}
#### ------ Prepare genome builds ------ ####
# e.g. genome_build <- list(reference="hg19",peakfiles="hg38",blacklist="hg19")
# or... genome_build <- "hg19"
builds <- prepare_genome_builds(genome_build = params$genome_build,
blacklist = params$blacklist)
## Standardise all data to hg19 build
output_build <- prepare_output_build(params$genome_build_output)
#### ------ Prepare peaklist(s) ------ ####
# and check that the list is named, if not default filenames are used
peaklist <- prepare_peaklist(peaklist = params$peakfiles)
peaklist <- liftover_grlist(grlist = peaklist,
input_build = builds$peaklist,
output_build = output_build)
#### ------ Prepare reference(s) ------ ####
reference <- prepare_reference(reference = params$reference)
reference <- liftover_grlist(grlist = reference,
input_build = builds$reference,
output_build = output_build)
#### ------ Prepare blacklist ------ ####
blacklist <- prepare_blacklist(blacklist = params$blacklist,
output_build = output_build,
blacklist_build = builds$blacklist)
### Standardise peaklist(s) ###
peaklist <- tidy_peakfile(peaklist = peaklist,
blacklist = blacklist)
peaklist_tidy <- peaklist
### Standardise reference(s) ###
# and include in peaklist
reference_tidy <- reference
if (!is.null(reference)){
reference_tidy <- tidy_peakfile(peaklist = reference,
blacklist = blacklist)
peaklist_tidy <- c(peaklist_tidy, reference_tidy)
}
### Obtain Genome Annotation ###
txdb <- check_genome_build(genome_build = output_build)
### Dynamic Figure Height ###
fig_height <- fig_length(default_size = 7,
number_of_items = length(peaklist_tidy),
max_items = 10)
needs_ref <- function(arg,
reference=NULL){
if(isTRUE(arg)){
if(is.null(reference)){
cat("NOTE: This plot is not generated when reference=NULL.")
return(FALSE)
} else{
return(TRUE)
}
} else {
return(FALSE)
}
}
EpiCompare
compares epigenomic datasets for quality control and benchmarking
purposes.
The report consists of three sections:
General Metrics: Metrics on peaks for each sample: % blacklisted and non-standard peaks, peak widths, fragments, duplication rates.
Peak Overlap: Frequency, percentage, statistical significance of overlapping and non-overlapping peaks. This also includes upset, precision-recall, and correlation plots.
Functional
Annotation: Functional annotation (ChromHMM
analysis, peak annotation, and enrichment analysis) of peaks. This also
includes peak distributions around transcription start sites
(TSS).
# print peak file names and numerate
cat(paste0(" - File ",seq_len(length(names(peaklist_tidy))),": ",
names(peaklist_tidy),
collapse = "\n\n"))
File 1: idr
File 2: mspc
Processed, filtered and lifted files for: peaklist,
reference, blacklist
download_button(object = list(peaklist = peaklist,
reference = reference_tidy,
blacklist = blacklist),
save_output = params$save_output,
filename = paste0("processed_peakfiles_",output_build),
outfile_dir = outfile_dir,
add_download_button = TRUE) # Always include button
The EpiCompare function call used to generate the
report:
cmd <- report_command(params = params,
peaklist_tidy = peaklist_tidy,
reference_tidy = reference_tidy)
cat(cmd)
EpiCompare(peakfiles = list('idr','mspc'),
genome_build = list('hg38','hg38','new("GRanges", seqnames = new("Rle", values = 1:24, lengths = c(54, 38, 31, 23, 22, 18, 14, 34, 35, 10, 13, 141, 22, 24, 19, 47, 69, 41, 32, 68, 51, 45, 47, 12), elementMetadata = NULL, metadata = list()), ranges = new("IRanges", start = c(628904, 5850088, 8909611, 9574581, 32043824, 33818965, 38674336, 50017082, 52996950, 55372489, 67971777, 73258721, 76971069, 93936366, 93937448, 102160408, 103620976, 106803433, 106804022, 106804754, 121609949, 125166232, 143184600, 146992423, 158449074, 158872115,
159295112, 169473896, 170006205, 172710351, 181422612, 191961695, 195288049, 199487950, 214709796, 215499616, 226652018, 227699753, 229019366, 233139986, 235520205, 235537406, 235538900, 235540244, 235540887, 235870626, 237940596, 237941046, 237941894, 237943029, 237943491, 237945286, 237948984, 237951295, 2235556, 19746629, 19747315, 25638377, 26873148, 30565119, 36432965, 36434048, 37600617, 37601247, 37601885, 38481301, 38782601, 39000366, 42066793, 45577926, 46706230, 47633791, 55597862, 55626795,
57668683, 59261794, 69590539, 69591476, 69592356, 69592777, 69594379, 69595142, 69595682, 77166389, 79411057, 89786505, 100057236, 112894489, 115056513, 123032372, 125819622, 133689374, 8023288, 10507707, 10768340, 10815185, 24839564, 27850563, 47323882, 50424040, 51081364, 64187169, 65069484, 73510501, 81551735, 81553836, 81556153, 81556718, 87813428, 87815684, 103270628, 103403271, 103404015, 103404780, 103405810, 103406654, 103407111, 103408090, 103410075, 110876920, 114021167, 123003426, 123139920,
9924, 2539175, 3887956, 19795478, 20769414, 21052951, 22005656, 27925109, 31247542, 31247964, 34665178, 40286246, 41363463, 41698592, 41699049, 49817253, 62773866, 80623984, 101486971, 123053922, 126583200, 126584138, 130315426, 16226301, 25984719, 31866924, 33516899, 36065386, 40768207, 53891452, 55971454, 56688342, 72344212, 75592085, 83688314, 84521525, 84522849, 88308158, 95692550, 95693014, 95694450, 95696146, 105488068, 107058663, 109423945, 16000601, 23426307, 32483954, 37490107, 40643841,
43116743, 45238636, 45430379, 46048458, 46847041, 51587296, 83587332, 83588230, 84171263, 84171839, 84173509, 84174280, 86498938, 17058501, 30477566, 32529780, 34715311, 35396111, 40133888, 41157029, 52100392, 54583732, 58152410, 58153293, 58155860, 67040731, 91960164, 3367431, 3369659, 3370151, 3370933, 3371689, 10719291, 10720418, 10721236, 10721901, 10723424, 10723816, 10724416, 20720930, 20721366, 20722104, 34071572, 34131997, 34571483, 34661169, 34919142, 35966578, 38275768, 46380677, 46386377,
46388623, 46390181, 46394472, 46398829, 60470625, 65701466, 67590313, 69358524, 73161121, 82119746, 141683, 14171309, 15568188, 19597516, 19598614, 19599800, 19600301, 19602161, 19602887, 19603848, 19604923, 20851030, 21851151, 22519043, 22520323, 22521117, 22526637, 22530382, 22532316, 22551067, 22813592, 26885753, 35654770, 43251641, 43309854, 43315022, 43997536, 53105553, 54902921, 59279407, 63076395, 63393239, 65555245, 72316259, 80617408, 2842088, 8103914, 8846333, 15457977, 34571461, 47853090,
52883628, 59288307, 61874563, 77455901, 246900, 12105017, 13362990, 24182200, 27741788, 36271918, 37572466, 37576135, 46122945, 47941357, 54794750, 56691536, 56922159, 638428, 1087104, 16271754, 22316879, 24644618, 32916202, 33767291, 33964665, 36276770, 40784788, 49229453, 50588766, 54451655, 57648678, 67953670, 75063568, 81666318, 82814942, 82815452, 82816262, 82818379, 82820801, 85068667, 87824710, 89272790, 89827608, 89828637, 89828843, 89833686, 89839593, 89909318, 90379779, 92081224, 92188126,
94499182, 94898977, 94900640, 94901422, 97189432, 102482583, 102505607, 110072035, 110299107, 116751235, 116752005, 116752518, 117020172, 117021108, 117022439, 117024278, 117025206, 117026131, 120211536, 120212686, 120213762, 120214591, 121220136, 124680744, 125812047, 129090775, 130272175, 130273452, 130274327, 130275175, 130276120, 130277775, 130279996, 130280828, 130557360, 130563143, 131369644, 131370950, 131371917, 131372759, 131379318, 131381593, 131382345, 131382773, 131383080, 131384052,
131384899, 131385357, 140217230, 140220210, 140220941, 140221199, 140222546, 140223648, 143088645, 143089939, 143090899, 143092256, 143093557, 143094516, 143095615, 143096049, 143096471, 143097467, 143100622, 147048575, 147244850, 147265035, 148822914, 148881546, 155196093, 155263346, 155264363, 155264600, 155311421, 155313540, 156828629, 162517272, 164117002, 166414324, 167378864, 168652434, 179739185, 190593882, 196204681, 201212171, 201212649, 201212904, 201214660, 201549405, 201550131, 201557569,
202614118, 202615372, 202617017, 202618436, 202619755, 211773628, 211774323, 211775642, 211777035, 211777803, 211778917, 215573164, 226722089, 237521664, 237522863, 5999470, 9168744, 10441917, 13167143, 18449174, 22078163, 24024377, 26438449, 28644085, 29125978, 30744371, 30746749, 31051541, 31157045, 31161653, 34688744, 47894700, 57063874, 57357556, 57358222, 57359452, 63644938, 6369258, 7201206, 7919586, 8211711, 8212413, 8213695, 8219373, 8234457, 8394768, 8395472, 8396752, 8445919, 8446630, 8446926,
8595670, 8844363, 8846670, 10014675, 10650901, 16645306, 32095836, 35890414, 44474914, 45376057, 10863371, 11210952, 11854151, 11856461, 11974160, 12135182, 12137712, 12691743, 12954428, 15153935, 15940534, 32894953, 33819339, 35885492, 36172706, 36177876, 46470113, 50086004, 50806859, 3571913, 24705150, 25467329, 29797535, 33548104, 40252108, 41532178, 43229297, 68658876, 68670346, 73054641, 82655448, 89588896, 90269606, 90774881, 91519650, 93705478, 96475263, 96617015, 106894020, 106895182, 106896125,
106898662, 106899754, 106901800, 106903189, 119947199, 120721859, 122688558, 125982520, 127005358, 128988980, 137095969, 142662233, 152919605, 153658705, 160947474, 166159727, 166160261, 166161632, 166226564, 166232407, 166232971, 166474024, 171534314, 177010777, 192880588, 5404509, 12640143, 14506100, 17061825, 18949311, 22502174, 25717757, 25718276, 25719399, 27730252, 30884525, 32280110, 41023065, 47772101, 49136057, 49141053, 49246356, 49548608, 49631232, 49708087, 51793953, 55327980, 64606370,
64606842, 64607396, 64607977, 64608938, 64609812, 64611177, 66065194, 68050142, 68572334, 78008403, 83383283, 89731704, 92701788, 107501925, 112372590, 116296653, 116297166, 116297660, 116299004, 128081281, 140929568, 143017908, 143347974, 144379498, 155076907, 155452734, 155453929, 155454408, 155455567, 155457625, 155459548, 155460172, 155461094, 155462079, 155463702, 155464896, 155465581, 157628392, 160044430, 161449478, 161788292, 162421208, 172036715, 179069260, 183489244, 189844496, 12285, 12953,
5395564, 5396183, 5396676, 8619084, 8619928, 8620708, 8621954, 8622355, 32927395, 37164287, 45913364, 60761359, 66253510, 73775721, 79089861, 80649842, 94567276, 97678634, 98409948, 98410701, 99813006, 100045806, 106553188, 111488865, 119127219, 121030821, 122338659, 123760112, 123760720, 134923134, 136533607, 137305007, 152198766, 160600366, 163146854, 163959712, 164673915, 166530242, 170635390, 1705931, 3943770, 29454055, 32706021, 43490987, 54899049, 58554347, 61278528, 61573961, 72747982, 72799170,
76708391, 88555203, 91726617, 94446938, 96941572, 104699856, 114377335, 122764825, 126478330, 127735331, 132799555, 133150493, 133930810, 138133083, 143077648, 153666230, 153667364, 153668188, 153669026, 156547730, 163638069, 18021727, 22748472, 33749121, 36228568, 37387571, 45251809, 55369050, 57167689, 57168473, 57169047, 57169551, 57170308, 57170676, 57171503, 57173799, 57174855, 57185616, 57186106, 57187288, 57188306, 57189117, 57190950, 57191619, 57192133, 57193490, 57193975, 57194830, 57196303,
57198264, 57879606, 58166364, 63094674, 64104134, 64105295, 64106628, 64108330, 64110008, 64111377, 64111958, 67627831, 68097608, 68736348, 69331806, 69332038, 69333014, 69333598, 72088576, 83100027, 83469985, 83855081, 95851250, 104989517, 112372485, 112374725, 117263553, 117264232, 130116679, 141173001, 141801917, 141802902, 141804075, 141804815, 142665100, 143187484, 145997160, 150131844, 153968599, 159294464, 13353293, 16056864, 18849122, 20551163, 32805709, 33010515, 33011360, 33014511, 33015021,
36277447, 36278273, 36278836, 40070432, 43237632, 43937901, 46827306, 46828299, 46830196, 46837582, 46838102, 50758260, 56736734, 61303080, 67580690, 67581589, 67582179, 67585217, 67585788, 67587283, 69102852, 72985529, 74828645, 76201593, 76645408, 97907909, 99495690, 102774316, 103082926, 103083705, 103084731, 103085324, 103086860, 108533902, 110933151, 110934511, 111248937, 120224205, 127053877, 127968654, 133615762, 133755391, 5091132, 5091963, 5093064, 5094193, 5094932, 5096207, 5097189, 5098135,
5099353, 5100045, 5108064, 5109194, 5110031, 9896971, 15866613, 18336472, 31498261, 33656534, 33658347, 34998989, 36466193, 43153722, 64045551, 64047856, 65048154, 68251003, 72788175, 78741396, 78742156, 78743200, 78744109, 78810722, 79804551, 80564644, 80565479, 81747642, 82427690, 92108966, 92539107, 95876957, 117109915, 122505688, 129878700, 134164479, 134170820, 4059513, 5168679, 5169734, 15727703, 17116415, 24056084, 24375346, 33762402, 55178597, 55179435, 55181197, 55183052, 58061544, 62841380,
62842258, 70119465, 70127234, 77501935, 78561722, 84403780, 100027095, 102010330, 102011532, 102772406, 102785905, 102798002, 102802748, 102809396, 104409870, 106239695, 111416894, 126471559, 126728885, 126729327, 126729838, 126730717, 126731625, 129983339, 133041872, 135292294, 143430214, 143431145, 143431717, 143432411, 143433511, 143543637, 146995843, 4344758, 9141871, 10203381, 10316750, 10594584, 10663670, 10744418, 11290798, 11493054, 11671015, 11721529, 56694633), width = c(6201, 484, 404,
417, 380, 380, 380, 465, 380, 381, 380, 380, 527, 382, 380, 380, 403, 384, 203, 590, 3453479, 18452, 92262, 380, 380, 380, 382, 443, 380, 382, 547, 469, 381, 200, 380, 399, 381, 381, 380, 380, 200, 380, 1213, 380, 763, 380, 384, 469, 853, 388, 1742, 1222, 382, 508, 201, 619, 1028, 380, 391, 383, 380, 1141, 386, 541, 966, 115200, 185300, 2916265, 38179, 380, 382, 382, 2198, 380, 380, 380, 200, 383, 385, 706, 382, 432, 380, 380, 412, 385, 829, 382, 200, 380, 380, 150, 380, 2793, 380, 200, 381, 380,
452, 389354, 3342701, 388, 380, 492, 200, 447, 385, 384, 893, 380, 380, 380, 513, 510, 567, 383, 200, 1804, 1137, 389, 380, 432, 382, 558, 808, 380, 387, 19, 380, 438, 380, 382, 380, 2764692, 380, 441, 380, 525, 382, 392, 200, 380, 380, 573, 393, 479, 1945100, 200, 380, 380, 451, 389, 380, 469, 408, 380, 384, 380, 750, 385, 200, 386, 202, 1249, 367, 381, 380, 616, 2173060, 385, 1345, 380, 380, 380, 381, 380, 380, 380, 552, 563, 831, 467, 1008, 461, 412, 380, 2780144, 380, 380, 382, 385, 872, 380,
380, 380, 705, 398, 296, 392, 380, 1116, 381, 392, 513, 534, 815, 364, 639, 380, 200, 385, 239, 383, 381, 449, 58, 157273, 26370, 99, 43, 2302535, 4917, 419, 115, 431, 608, 617, 2819, 382, 381, 380, 467, 380, 380, 380, 380, 380, 470, 919, 411, 1764, 385, 709, 200, 666, 380, 140910, 1107, 703, 5291, 3516, 1775, 625, 380, 3903079, 43, 413, 123, 195, 1470, 422, 1013, 381, 381, 383, 200, 380, 380, 395, 447, 200, 381, 5407756, 380, 528, 380, 380, 398, 380, 553, 383, 380, 3075343, 81, 231, 381, 382, 380,
70, 380, 201, 443, 381, 381, 381, 380, 380, 431, 413, 381, 380, 491, 606, 801, 380, 380, 380, 427, 532, 380, 785, 386, 370, 205, 380, 821, 344, 99, 74, 100, 108, 117, 472, 22678, 175, 2105338, 71775, 669, 201, 387, 382, 380, 381, 400, 240, 380, 444, 380, 381, 1045, 1600, 816, 465, 382, 529, 384, 382, 780, 380, 439, 502, 380, 441, 530, 666, 570, 381, 953, 734, 613, 248, 254, 282, 613, 445, 379, 1027, 381, 384, 202, 937, 570, 383, 438, 815, 631, 200, 1171, 1078, 650, 398, 420, 764, 391, 385, 484, 380,
380, 866, 515, 384, 381, 380, 398, 382, 487, 381, 968, 200, 383, 575, 383, 380, 380, 381, 456, 381, 380, 505, 381, 380, 442, 206, 483, 381, 380, 383, 380, 410, 386, 382, 384, 380, 531, 870, 1071, 383, 467, 646, 381, 508, 1112, 790, 380, 402, 380, 392, 383, 380, 381, 2116114, 371489, 168662, 569, 493, 55369, 2072, 61679, 296, 1410, 406, 579, 1207, 1521, 381, 3085, 126680, 106, 182, 158, 293, 958, 112, 135, 120, 260, 162, 100, 145, 99, 493, 713, 520, 2314900, 380, 380, 383, 388, 461, 78, 4538, 493,
713, 177, 713, 493, 2355, 2103068, 57568, 145195, 393, 200, 407, 380, 382, 381, 526, 1366, 380, 380, 394, 380, 380, 1809, 379, 437, 592, 389, 380, 380, 643, 452584, 474715, 2137875, 94542, 381, 1666, 422, 387, 380, 361, 369, 942, 417, 380, 752, 381, 381, 388, 380, 380, 380, 391, 383, 654, 382, 384, 456, 382, 480, 385, 200, 387, 380, 380, 389, 673, 368, 389, 381, 380, 380, 380, 228, 496, 382, 380, 384, 444, 46, 95, 493, 493, 26894, 2035863, 23297, 483, 383, 519, 394, 825, 389, 1065, 441, 438, 380,
441, 480, 380, 460, 513, 380, 380, 388, 380, 1067, 1413, 676, 380, 200, 381, 380, 382, 202, 385, 1040, 200, 384, 200, 382, 596, 1378, 1138, 410, 1044, 383, 386, 380, 380, 514, 380, 380, 380, 81, 239, 409, 380, 434, 382, 381, 380, 485, 380, 399, 382, 387, 4352056, 818, 380, 392, 380, 2707, 3823, 383, 380, 557, 383, 9420, 502, 380, 384, 625, 384, 511, 1199, 5559, 380, 381, 380, 380, 381, 380, 374, 400, 385, 374, 380, 381, 830, 384, 200, 1276232, 242579, 849, 380, 380, 380, 389, 747, 433, 380, 200,
200, 380, 380, 380, 385, 389, 381, 380, 384, 389, 381, 566, 394, 389, 380, 380, 383, 380, 441, 380, 481, 380, 383, 380, 384, 382, 216, 735, 620, 383, 385, 380, 484, 746, 567, 614, 383, 200, 728, 383, 727, 381, 1188, 381, 152899, 4828961, 384, 380, 1121, 383, 469, 700, 428, 892, 383, 383, 464, 200, 401, 380, 570, 380, 380, 200, 384, 380, 380, 381, 226, 632, 383, 380, 384, 535, 465, 200, 693, 2747, 380, 449, 386, 381, 383, 387, 200, 450, 392, 384, 380, 2712, 385, 833, 614, 519, 799, 436, 4759, 2031700,
609, 1663, 1027, 380, 383, 380, 200, 381, 804, 384, 390, 477, 388, 640, 383, 395, 381, 727, 393, 371, 444, 380, 454, 695, 380, 483, 383, 380, 383, 500, 380, 380, 381, 381, 381, 466, 380, 1051, 1060, 505, 885, 610, 702, 382, 200, 383, 529, 793, 381, 380, 380, 383, 380, 1783, 953, 486, 380, 2371440, 493, 567, 31471, 69, 381, 380, 814, 431, 384, 392, 383, 442, 463, 380, 382, 382, 657, 382, 382, 380, 382, 876, 241, 200, 554, 913, 387, 380, 387, 200, 380, 693, 1025, 1594, 1061, 4760173, 386, 382, 381,
387, 380, 200, 389, 381, 383, 384, 386, 383, 385, 414, 393, 380, 200, 401, 1893, 388, 383, 380, 390, 405, 200, 380, 200, 624, 393, 503, 802, 646, 387, 382, 122, 125, 63552, 227697, 32255, 47, 177080, 43481, 99797, 32, 27944, 195111), NAMES = NULL, elementType = "ANY", elementMetadata = NULL, metadata = list()), strand = new("Rle", values = 3, lengths = 910, elementMetadata = NULL, metadata = list()), seqinfo = new("Seqinfo", seqnames = c("chr1", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15",
"chr16", "chr17", "chr18", "chr19", "chr2", "chr20", "chr21", "chr22", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chrX", "chrY"), seqlengths = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), genome = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), elementMetadata = new("DFrame",
rownames = NULL, nrows = 910, elementType = "ANY", elementMetadata = NULL, metadata = list(), listData = list()), elementType = "ANY", metadata = list())'),
genome_build_output = 'hg38',
blacklist = NULL,
picard_files = list(),
reference = NULL,
upset_plot = FALSE,
stat_plot = TRUE,
chromHMM_plot = FALSE,
chromHMM_annotation = 'K562',
chipseeker_plot = TRUE,
enrichment_plot = TRUE,
tss_plot = FALSE,
tss_distance = c(-3000,3000),
precision_recall_plot = FALSE,
n_threshold = 20,
corr_plot = FALSE,
interact = TRUE,
save_output = FALSE,
output_dir = './',
workers = 1,
error = FALSE)
BRGenomics::tidyChromosomes().:information: Note: All EpiCompare
analyses conducted on the peak files after they have been
filtered (i.e. blacklisted regions and non-standard chromosomes
removed) and lifted (as needed).
peak_info_df <- peak_info(peaklist = peaklist,
blacklist = blacklist)
download_button(object = peak_info_df,
filename = "peak_info_df",
output_extension = ".csv",
add_download_button = params$add_download_button)
NULL
# Print table
knitr::kable(peak_info_df, format = "markdown")
| PeakN Before Tidy | Blacklisted Peaks Removed (%) | Non-standard Peaks Removed (%) | PeakN After Tidy | |
|---|---|---|---|---|
| idr | 2441 | 0 | 0 | 2441 |
| mspc | 33687 | 0 | 0 | 33687 |
save_output(save_output = params$save_output,
file = peak_info_df,
file_type = "data.frame",
filename = "peak_info",
outpath = outfile_dir)
remove(peak_info_df)
Metrics on fragments is shown only if Picard summary is provided. See manual for help.
if (!is.null(params$picard_files)){
fragment_info_df <- fragment_info(picard_list = params$picard_files)
download_button(object = fragment_info_df,
filename = "fragment_info",
output_extension = ".csv",
add_download_button = params$add_download_button)
# Print data frame
knitr::kable(fragment_info_df, format = "markdown")
save_output(save_output = params$save_output,
file = fragment_info_df,
file_type = "data.frame",
filename = "fragment_info",
outpath = outfile_dir)
remove(fragment_info_df)
}
Distribution of peak widths in samples
width_plot <- width_boxplot(peaklist = peaklist_tidy,
interact = params$interact)
download_button(object = width_plot,
filename = "width_boxplot",
self_contained = params$interact,
add_download_button = params$add_download_button)
NULL
width_plot$plot
# Save boxplot
save_output(save_output = params$save_output,
file = width_plot$plot,
file_type = "ggplot",
filename = "width_plot",
outpath = outfile_dir,
interactive = params$interact)
# Remove variable
remove(width_plot)
Percentage of overlapping peaks between samples. Hover over heatmap for percentage values.
The heatmap can be interpreted as follows:
overlap_heatmap <- overlap_heatmap(peaklist = peaklist_tidy,
interact = params$interact)
download_button(object = overlap_heatmap,
filename = "overlap_heatmap",
self_contained = params$interact,
add_download_button = params$add_download_button)
NULL
overlap_heatmap$plot
# Save output
save_output(save_output = params$save_output,
file = overlap_heatmap$plot,
file_type = "ggplot",
filename = "samples_percent_overlap",
outpath = outfile_dir,
interactive = params$interact)
# Delete variable
remove(overlap_heatmap)
Upset plot of overlapping peaks between samples. See here on how to interpret the plot.
upset_plot <- NULL
if(isTRUE(params$upset_plot)){
upset_plot <- overlap_upset_plot(peaklist = peaklist_tidy)
download_button(object = upset_plot,
filename = "upset_plot",
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = upset_plot,
file_type = "image",
filename = "upset_plot",
outpath = outfile_dir)
}
upset_plot
NULL
remove(upset_plot)
Depending on the format of the reference file,
EpiCompare produces different plots:
MACS2):
EpiCompare generates paired boxplot per sample showing the
distribution of -log10(q-value) of reference peaks that are
overlapping and non-overlapping with the sample dataset.EpiCompare generates a barplot of percentage of overlapping
sample peaks with the reference. Statistical significance (adjusted
p-value) is written above each bar.Reference peakfile:
if (needs_ref(params$stat_plot, reference)){
stat_plot <- overlap_stat_plot(reference = reference_tidy,
peaklist = peaklist,
txdb = txdb,
interact = params$interact)
download_button(object = stat_plot,
filename = "overlap_stat_plot",
button_label = "Download overlap stat plot",
self_contained = params$interact,
add_download_button = params$add_download_button)
stat_plot$plot
# Save output
save_output(save_output = params$save_output,
file = stat_plot$plot,
file_type = "ggplot",
filename = "stat_plot",
outpath = outfile_dir,
interactive = params$interact)
# Remove variables
remove(stat_plot)
}
NOTE: This plot is not generated when reference=NULL.
The first plot shows the balance between precision and recall across multiple peak calling stringency thresholds.
The second plot shows F1 score (a score that combines precision and recall) across the different peak calling stringency thresholds.
2*(precision*recall) / (precision+recall)pr_out <- NULL
if(needs_ref(params$precision_recall_plot, reference)){
#### Create save path ####
save_path <- if(isFALSE(params$save_output)){NULL}else{
file.path(outfile_dir,"precision_recall.csv")
}
pr_out <- plot_precision_recall(peakfiles = peaklist,
reference = reference_tidy,
n_threshold = params$n_threshold,
workers = params$workers,
show_plot = FALSE,
verbose = FALSE,
save_path = save_path,
interact = params$interact)
download_button(object = pr_out,
filename = "precision_recall",
self_contained = params$interact,
add_download_button = params$add_download_button)
}
pr_out$precision_recall_plot
NULL
cat("\n\n")
pr_out$f1_plot
NULL
remove(pr_out)
The correlation plot shows the correlation between the quantiles when the genome is binned at a set size. These quantiles are based on the intensity of the peak, dependent on the peak caller used (q-value for MACS2):
cp_out <- NULL
if(isTRUE(params$corr_plot)){
#### Create save path ####
save_path <- if(isFALSE(params$save_output)){NULL}else{
file.path(outfile_dir,"corr.csv.gz")
}
cp_out <- plot_corr(peakfiles = peaklist_tidy,
# reference can be NULL
reference = reference_tidy,
genome_build = output_build,
bin_size = params$bin_size,
workers = params$workers,
show_plot = FALSE,
save_path = save_path,
interact = params$interact)
download_button(object = cp_out,
filename = "correlation_plot",
self_contained = params$interact,
add_download_button = params$add_download_button)
}
cp_out$corr_plot
NULL
remove(cp_out)
ChromHMM
annotates and characterises peaks into different chromatin states.
ChromHMM annotations used in EpiCompare
were obtained from here.
ChromHMM annotation definitions:
For more information on ChromHMM states, please see here
ChromHMM annotation of individual samples.
samples_chromHMM <- NULL
if(isTRUE(params$chromHMM_plot)){
# Get ChromHMM annotation file
chromHMM_list <- get_chromHMM_annotation(cell_line = params$chromHMM_annotation)
# Plot chromHMM
samples_chromHMM <- plot_chromHMM(peaklist = peaklist_tidy,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = samples_chromHMM,
filename = "samples_ChromHMM",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = samples_chromHMM,
file_type = "ggplot",
filename = "samples_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
samples_chromHMM
NULL
remove(samples_chromHMM)
Percentage of Sample peaks found in Reference peaks (Reference peakfile: )
if(needs_ref(params$chromHMM_plot, reference)){
# generate data frame of percentage overlap
sample_in_ref_df <- overlap_percent(peaklist1 = peaklist_tidy,
peaklist2 = reference_tidy,
invert = FALSE)
download_button(object = sample_in_ref_df,
filename = "sample_in_ref_df",
output_extension = ".csv",
add_download_button = params$add_download_button)
knitr::kable(sample_in_ref_df, format = "markdown")
}
ChromHMM annotation of sample peaks found in reference peaks.
sample_in_ref_chromHMM <- NULL
if(needs_ref(params$chromHMM_plot, reference)){
# Obtain overlapping peaks
sample_in_ref_list <- mapply(peaklist_tidy, FUN=function(file){
IRanges::subsetByOverlaps(x = file,
ranges = reference_tidy[[1]])
})
# Run ChromHMM
sample_in_ref_chromHMM <- plot_chromHMM(peaklist = sample_in_ref_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = sample_in_ref_chromHMM,
filename = "sample_in_ref_chromHMM",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = sample_in_ref_chromHMM,
file_type = "ggplot",
filename = "sample_in_ref_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
sample_in_ref_chromHMM
NULL
remove(sample_in_ref_chromHMM)
Percentage of Reference peaks found in Sample peaks (Reference peakfile: )
if (needs_ref(params$chromHMM_plot, reference)){
# Data frame of overlapping peaks
ref_in_sample_df <- overlap_percent(peaklist1 = reference_tidy,
peaklist2 = peaklist_tidy,
invert = FALSE)
download_button(object = ref_in_sample_df,
filename = "ref_in_sample_df",
output_extension = ".csv",
add_download_button = params$add_download_button)
knitr::kable(ref_in_sample_df, format = "markdown")
}
ChromHMM annotation of reference peaks found in sample peaks.
ref_in_sample_chromHMM <- NULL
if (needs_ref(params$chromHMM_plot, reference)){
# Subset overlapping peaks
ref_in_sample_list <- mapply(peaklist_tidy, FUN = function(file){
IRanges::subsetByOverlaps(x = reference_tidy[[1]],
ranges = file)
})
# Plot ChromHMM
ref_in_sample_chromHMM <- plot_chromHMM(peaklist = ref_in_sample_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = ref_in_sample_chromHMM,
filename = "ref_in_sample_chromHMM",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = ref_in_sample_chromHMM,
file_type = "ggplot",
filename = "ref_in_sample_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
ref_in_sample_chromHMM
NULL
remove(ref_in_sample_chromHMM)
Percentage of sample peaks not found in reference peaks (Reference peakfile: )
if (needs_ref(params$chromHMM_plot, reference)){
# Data frame of non-overlapping peaks
sample_not_in_ref_df <- overlap_percent(peaklist1 = peaklist_tidy,
peaklist2 = reference_tidy,
invert = TRUE)
download_button(object = sample_not_in_ref_df,
filename = "sample_not_in_ref_df",
output_extension = ".csv",
add_download_button = params$add_download_button)
knitr::kable(sample_not_in_ref_df, format = "markdown")
}
ChromHMM annotation of sample peaks not found in reference peaks.
sample_not_in_ref_chromHMM <- NULL
if (needs_ref(params$chromHMM_plot, reference)){
sample_not_in_ref_list <- mapply(peaklist_tidy, FUN = function(file){
IRanges::subsetByOverlaps(x = file,
ranges = reference_tidy[[1]],
invert = TRUE)
})
# Run ChromHMM
sample_not_in_ref_chromHMM<-plot_chromHMM(peaklist = sample_not_in_ref_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = sample_not_in_ref_chromHMM,
filename = "sample_not_in_ref_chromHMM",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = sample_not_in_ref_chromHMM,
file_type = "ggplot",
filename = "sample_not_in_ref_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
sample_not_in_ref_chromHMM
NULL
remove(sample_not_in_ref_chromHMM)
Percentage of reference peaks not found in sample peaks (Reference peakfile: )
if (needs_ref(params$chromHMM_plot, reference)){
# Data frame of non-overlapping peaks
ref_not_in_sample_df <- overlap_percent(peaklist1 = reference_tidy,
peaklist2 = peaklist_tidy,
invert = TRUE)
download_button(object = ref_not_in_sample_df,
filename = "ref_not_in_sample_df",
output_extension = ".csv",
add_download_button = params$add_download_button)
knitr::kable(ref_not_in_sample_df, format = "markdown")
}
ChromHMM annotation of reference peaks not found in sample peaks.
ref_not_in_sample_chromHMM <- NULL
if (needs_ref(params$chromHMM_plot, reference)){
# Subset unique peaks
ref_not_in_sample_list <- mapply(peaklist_tidy, FUN = function(file){
IRanges::subsetByOverlaps(x = reference_tidy[[1]],
ranges = file,
invert = TRUE)
})
# Run ChromHMM
ref_not_in_sample_chromHMM<-plot_chromHMM(peaklist = ref_not_in_sample_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = ref_not_in_sample_chromHMM,
filename = "ref_not_in_sample_chromHMM",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = ref_not_in_sample_chromHMM,
file_type = "ggplot",
filename = "ref_not_in_sample_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
ref_not_in_sample_chromHMM
NULL
remove(ref_not_in_sample_chromHMM)
EpiCompare uses ChIPseeker::annotatePeak()
to annotate peaks with the nearest gene and genomic regions where the
peak is located. The peaks are annotated with genes taken from human
genome annotations (hg19 or hg38) distributed by Bioconductor.
chipseeker_plot <- NULL
if(isTRUE(params$chipseeker_plot)){
chipseeker_plot <- plot_ChIPseeker_annotation(
peaklist = peaklist_tidy,
txdb = txdb,
tss_distance = params$tss_distance,
interact = params$interact)
download_button(object = chipseeker_plot,
filename = "chipseeker_plot",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = chipseeker_plot,
file_type = "ggplot",
filename = "chipseeker_annotation",
outpath = outfile_dir,
interactive = params$interact)
}
chipseeker_plot
remove(chipseeker_plot)
EpiCompare performs KEGG pathway and GO enrichment
analysis using clusterProfiler.
ChIPseeker::annotatePeak() is first used to assign peaks to
nearest genes. Biological themes amongst the genes are identified using
ontologies (KEGG and GO). The peaks are annotated with genes taken from
annotations of human genome (hg19 or hg38) provided by Bioconductor.
enrichment_plots <- NULL
if (isTRUE(params$enrichment_plot)){
enrichment_plots <- plot_enrichment(peaklist = peaklist_tidy,
txdb = txdb,
tss_distance = params$tss_distance,
interact = params$interact)
# Figure height
max_terms <- max(
length(unique(enrichment_plots$kegg_plot$data$Description)),
length(unique(enrichment_plots$go_plot$data$Description))
)
fig_height <- fig_length(default_size = 10,
number_of_items = max_terms,
max_items = 20)
}
Kyoto Encyclopedia of Genes and Genomes (KEGG) enrichment results.
if (isTRUE(params$enrichment_plot)){
download_button(object = enrichment_plots$kegg_plot,
filename = "KEGG_plot",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = enrichment_plots$kegg_plot,
file_type = "ggplot",
filename = "KEGG_analysis",
outpath = outfile_dir,
interactive = params$interact)
}
enrichment_plots$kegg_plot
Gene Ontology (GO) enrichment results.
GeneRatio definition:
if (isTRUE(params$enrichment_plot)){
download_button(object = enrichment_plots$go_plot,
filename = "GO_plot",
self_contained = params$interact,
add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = enrichment_plots$go_plot,
file_type = "ggplot",
filename = "GO_analysis",
outpath = outfile_dir,
interactive = params$interact)
}
enrichment_plots$go_plot
remove(enrichment_plots)
This plots peaks that are mapping to transcriptional start sites (TSS). TSS regions are defined as the flanking sequence of the TSS sites.
By default, this function plots the frequency of peaks upstream
(-3000bp) and downstream (default: +3000bp) of TSS.
These ranges can be adjusted with the argument
EpCompare(tss_distance=).
The grey area around the main frequency line represents the 95% confidence interval estimated by bootstrapping.
tssplt <- NULL
if (isTRUE(params$tss_plot)){
tssplt <- tss_plot(peaklist = peaklist_tidy,
txdb = txdb,
tss_distance = params$tss_distance,
workers = params$workers,
interact = params$interact)
download_button(object = tssplt,
filename = "tss_plots",
self_contained = params$interact,
add_download_button = params$add_download_button)
}
tssplt
NULL
remove(tssplt,p)
If you use EpiCompare, please cite:
cat(utils::citation("EpiCompare")$textVersion)
EpiCompare: R package for the comparison and quality control of epigenomic peak files (2022) Sera Choi, Brian M. Schilder, Leyla Abbasova, Alan E. Murphy, Nathan G. Skene, bioRxiv, 2022.07.22.501149; doi: https://doi.org/10.1101/2022.07.22.501149
utils::sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.2.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] EpiCompare_1.6.4 ConsensusPeak_0.0.0.9000 testthat_3.2.1
## [4] devtools_2.4.5 usethis_2.2.2
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.3 matrixStats_1.2.0
## [3] bitops_1.0-7 enrichplot_1.22.0
## [5] lubridate_1.9.3 webshot_0.5.5
## [7] HDO.db_0.99.1 httr_1.4.7
## [9] RColorBrewer_1.1-3 profvis_0.3.8
## [11] tools_4.3.2 utf8_1.2.4
## [13] R6_2.5.1 lazyeval_0.2.2
## [15] urlchecker_1.0.1 withr_3.0.0
## [17] prettyunits_1.2.0 gridExtra_2.3
## [19] cli_3.6.2 Biobase_2.62.0
## [21] formatR_1.14 TSP_1.2-4
## [23] scatterpie_0.2.1 labeling_0.4.3
## [25] sass_0.4.8 readr_2.1.5
## [27] bsplus_0.1.4 Rsamtools_2.18.0
## [29] yulab.utils_0.1.4 gson_0.1.0
## [31] DOSE_3.28.2 sessioninfo_1.2.2
## [33] plotrix_3.8-4 BSgenome_1.70.1
## [35] rstudioapi_0.15.0 impute_1.76.0
## [37] RSQLite_2.3.5 generics_0.1.3
## [39] gridGraphics_0.5-1 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [41] BiocIO_1.12.0 crosstalk_1.2.1
## [43] gtools_3.9.5 dendextend_1.17.1
## [45] dplyr_1.1.4 GO.db_3.18.0
## [47] Matrix_1.6-5 futile.logger_1.4.3
## [49] fansi_1.0.6 S4Vectors_0.40.2
## [51] abind_1.4-5 lifecycle_1.0.4
## [53] yaml_2.3.8 SummarizedExperiment_1.32.0
## [55] gplots_3.1.3.1 qvalue_2.34.0
## [57] SparseArray_1.2.3 BiocFileCache_2.10.1
## [59] grid_4.3.2 blob_1.2.4
## [61] promises_1.2.1 ExperimentHub_2.10.0
## [63] crayon_1.5.2 dir.expiry_1.10.0
## [65] miniUI_0.1.1.1 lattice_0.22-5
## [67] cowplot_1.1.3 GenomicFeatures_1.54.3
## [69] KEGGREST_1.42.0 pillar_1.9.0
## [71] knitr_1.45 fgsea_1.28.0
## [73] GenomicRanges_1.54.1 rjson_0.2.21
## [75] boot_1.3-28.1 codetools_0.2-19
## [77] fastmatch_1.1-4 glue_1.7.0
## [79] ggfun_0.1.4 data.table_1.15.0
## [81] remotes_2.4.2.1 idr_1.3
## [83] vctrs_0.6.5 png_0.1-8
## [85] treeio_1.26.0 gtable_0.3.4
## [87] assertthat_0.2.1 cachem_1.0.8
## [89] xfun_0.41 S4Arrays_1.2.0
## [91] mime_0.12 tidygraph_1.3.1
## [93] seriation_1.5.4 iterators_1.0.14
## [95] interactiveDisplayBase_1.40.0 ellipsis_0.3.2
## [97] nlme_3.1-164 ggtree_3.10.0
## [99] bit64_4.0.5 progress_1.2.3
## [101] filelock_1.0.3 GenomeInfoDb_1.38.5
## [103] rprojroot_2.0.4 bslib_0.6.1
## [105] KernSmooth_2.23-22 colorspace_2.1-0
## [107] BiocGenerics_0.48.1 DBI_1.2.0
## [109] seqPattern_1.34.0 DESeq2_1.42.0
## [111] tidyselect_1.2.0 processx_3.8.3
## [113] bit_4.0.5 compiler_4.3.2
## [115] curl_5.2.0 basilisk.utils_1.14.1
## [117] genomation_1.34.0 xml2_1.3.6
## [119] plotly_4.10.4 desc_1.4.3
## [121] DelayedArray_0.28.0 shadowtext_0.1.3
## [123] rtracklayer_1.62.0 scales_1.3.0
## [125] caTools_1.18.2 ChIPseeker_1.38.0
## [127] rappdirs_0.3.3 BRGenomics_1.13.0
## [129] stringr_1.5.1 digest_0.6.34
## [131] rmarkdown_2.25 ca_0.71.1
## [133] basilisk_1.14.3 XVector_0.42.0
## [135] base64enc_0.1-3 htmltools_0.5.7
## [137] pkgconfig_2.0.3 MatrixGenerics_1.14.0
## [139] dbplyr_2.4.0 fastmap_1.1.1
## [141] rlang_1.1.3 htmlwidgets_1.6.4
## [143] shiny_1.8.0 rmspc_1.8.0
## [145] jquerylib_0.1.4 farver_2.1.1
## [147] jsonlite_1.8.8 BiocParallel_1.36.0
## [149] GOSemSim_2.28.1 RCurl_1.98-1.14
## [151] magrittr_2.0.3 GenomeInfoDbData_1.2.11
## [153] ggplotify_0.1.2 patchwork_1.2.0
## [155] munsell_0.5.0 Rcpp_1.0.12
## [157] ape_5.7-1 viridis_0.6.5
## [159] reticulate_1.35.0 stringi_1.8.3
## [161] ggraph_2.1.0 brio_1.1.4
## [163] zlibbioc_1.48.0 MASS_7.3-60.0.1
## [165] org.Hs.eg.db_3.18.0 AnnotationHub_3.10.0
## [167] plyr_1.8.9 pkgbuild_1.4.3
## [169] parallel_4.3.2 ggrepel_0.9.5
## [171] Biostrings_2.70.2 graphlayouts_1.1.0
## [173] splines_4.3.2 hms_1.1.3
## [175] locfit_1.5-9.8 ps_1.7.6
## [177] igraph_2.0.1.1 reshape2_1.4.4
## [179] biomaRt_2.58.2 stats4_4.3.2
## [181] pkgload_1.3.4 futile.options_1.0.1
## [183] BiocVersion_3.18.1 XML_3.99-0.16.1
## [185] evaluate_0.23 lambda.r_1.2.4
## [187] BiocManager_1.30.22 foreach_1.5.2
## [189] tzdb_0.4.0 tweenr_2.0.2
## [191] httpuv_1.6.14 tidyr_1.3.1
## [193] purrr_1.0.2 polyclip_1.10-6
## [195] heatmaply_1.5.0 ggplot2_3.4.4
## [197] gridBase_0.4-7 ggforce_0.4.1
## [199] idr2d_1.16.0 MACSr_1.10.0
## [201] xtable_1.8-4 restfulr_0.0.15
## [203] tidytree_0.4.6 roxygen2_7.3.1
## [205] later_1.3.2 viridisLite_0.4.2
## [207] TxDb.Hsapiens.UCSC.hg38.knownGene_3.18.0 tibble_3.2.1
## [209] clusterProfiler_4.10.0 aplot_0.2.2
## [211] registry_0.5-1 memoise_2.0.1
## [213] AnnotationDbi_1.64.1 GenomicAlignments_1.38.2
## [215] IRanges_2.36.0 timechange_0.3.0
## [217] downloadthis_0.3.3